In [64]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
In [65]:
train = pd.read_csv('/Users/urvaj/Documents/Data Science for Consulting/Module 7/fraudTest.csv')

test = pd.read_csv('/Users/urvaj/Documents/Data Science for Consulting/Module 7/fraudTrain.csv')

df = pd.concat([train, test])
df.shape
Out[65]:
(1852394, 23)
In [66]:
train
Out[66]:
Unnamed: 0 trans_date_trans_time cc_num merchant category amt first last gender street ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
0 0 2020-06-21 12:14:25 2291163933867244 fraud_Kirlin and Sons personal_care 2.86 Jeff Elliott M 351 Darlene Green ... 33.9659 -80.9355 333497 Mechanical engineer 1968-03-19 2da90c7d74bd46a0caf3777415b3ebd3 1371816865 33.986391 -81.200714 0
1 1 2020-06-21 12:14:33 3573030041201292 fraud_Sporer-Keebler personal_care 29.84 Joanne Williams F 3638 Marsh Union ... 40.3207 -110.4360 302 Sales professional, IT 1990-01-17 324cc204407e99f51b0d6ca0055005e7 1371816873 39.450498 -109.960431 0
2 2 2020-06-21 12:14:53 3598215285024754 fraud_Swaniawski, Nitzsche and Welch health_fitness 41.28 Ashley Lopez F 9333 Valentine Point ... 40.6729 -73.5365 34496 Librarian, public 1970-10-21 c81755dbbbea9d5c77f094348a7579be 1371816893 40.495810 -74.196111 0
3 3 2020-06-21 12:15:15 3591919803438423 fraud_Haley Group misc_pos 60.05 Brian Williams M 32941 Krystal Mill Apt. 552 ... 28.5697 -80.8191 54767 Set designer 1987-07-25 2159175b9efe66dc301f149d3d5abf8c 1371816915 28.812398 -80.883061 0
4 4 2020-06-21 12:15:17 3526826139003047 fraud_Johnston-Casper travel 3.19 Nathan Massey M 5783 Evan Roads Apt. 465 ... 44.2529 -85.0170 1126 Furniture designer 1955-07-06 57ff021bd3f328f8738bb535c302a31b 1371816917 44.959148 -85.884734 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
555714 555714 2020-12-31 23:59:07 30560609640617 fraud_Reilly and Sons health_fitness 43.77 Michael Olson M 558 Michael Estates ... 40.4931 -91.8912 519 Town planner 1966-02-13 9b1f753c79894c9f4b71f04581835ada 1388534347 39.946837 -91.333331 0
555715 555715 2020-12-31 23:59:09 3556613125071656 fraud_Hoppe-Parisian kids_pets 111.84 Jose Vasquez M 572 Davis Mountains ... 29.0393 -95.4401 28739 Futures trader 1999-12-27 2090647dac2c89a1d86c514c427f5b91 1388534349 29.661049 -96.186633 0
555716 555716 2020-12-31 23:59:15 6011724471098086 fraud_Rau-Robel kids_pets 86.88 Ann Lawson F 144 Evans Islands Apt. 683 ... 46.1966 -118.9017 3684 Musician 1981-11-29 6c5b7c8add471975aa0fec023b2e8408 1388534355 46.658340 -119.715054 0
555717 555717 2020-12-31 23:59:24 4079773899158 fraud_Breitenberg LLC travel 7.99 Eric Preston M 7020 Doyle Stream Apt. 951 ... 44.6255 -116.4493 129 Cartographer 1965-12-15 14392d723bb7737606b2700ac791b7aa 1388534364 44.470525 -117.080888 0
555718 555718 2020-12-31 23:59:34 4170689372027579 fraud_Dare-Marvin entertainment 38.13 Samuel Frey M 830 Myers Plaza Apt. 384 ... 35.6665 -97.4798 116001 Media buyer 1993-05-10 1765bb45b3aa3224b4cdcb6e7a96cee3 1388534374 36.210097 -97.036372 0

555719 rows × 23 columns

In [67]:
train.columns
Out[67]:
Index(['Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'merchant', 'category',
       'amt', 'first', 'last', 'gender', 'street', 'city', 'state', 'zip',
       'lat', 'long', 'city_pop', 'job', 'dob', 'trans_num', 'unix_time',
       'merch_lat', 'merch_long', 'is_fraud'],
      dtype='object')
In [68]:
test
Out[68]:
Unnamed: 0 trans_date_trans_time cc_num merchant category amt first last gender street ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
0 0 2019-01-01 00:00:18 2703186189652095 fraud_Rippin, Kub and Mann misc_net 4.97 Jennifer Banks F 561 Perry Cove ... 36.0788 -81.1781 3495 Psychologist, counselling 1988-03-09 0b242abb623afc578575680df30655b9 1325376018 36.011293 -82.048315 0
1 1 2019-01-01 00:00:44 630423337322 fraud_Heller, Gutmann and Zieme grocery_pos 107.23 Stephanie Gill F 43039 Riley Greens Suite 393 ... 48.8878 -118.2105 149 Special educational needs teacher 1978-06-21 1f76529f8574734946361c461b024d99 1325376044 49.159047 -118.186462 0
2 2 2019-01-01 00:00:51 38859492057661 fraud_Lind-Buckridge entertainment 220.11 Edward Sanchez M 594 White Dale Suite 530 ... 42.1808 -112.2620 4154 Nature conservation officer 1962-01-19 a1a22d70485983eac12b5b88dad1cf95 1325376051 43.150704 -112.154481 0
3 3 2019-01-01 00:01:16 3534093764340240 fraud_Kutch, Hermiston and Farrell gas_transport 45.00 Jeremy White M 9443 Cynthia Court Apt. 038 ... 46.2306 -112.1138 1939 Patent attorney 1967-01-12 6b849c168bdad6f867558c3793159a81 1325376076 47.034331 -112.561071 0
4 4 2019-01-01 00:03:06 375534208663984 fraud_Keeling-Crist misc_pos 41.96 Tyler Garcia M 408 Bradley Rest ... 38.4207 -79.4629 99 Dance movement psychotherapist 1986-03-28 a41d7549acf90789359a9aa5346dcb46 1325376186 38.674999 -78.632459 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1296670 1296670 2020-06-21 12:12:08 30263540414123 fraud_Reichel Inc entertainment 15.56 Erik Patterson M 162 Jessica Row Apt. 072 ... 37.7175 -112.4777 258 Geoscientist 1961-11-24 440b587732da4dc1a6395aba5fb41669 1371816728 36.841266 -111.690765 0
1296671 1296671 2020-06-21 12:12:19 6011149206456997 fraud_Abernathy and Sons food_dining 51.70 Jeffrey White M 8617 Holmes Terrace Suite 651 ... 39.2667 -77.5101 100 Production assistant, television 1979-12-11 278000d2e0d2277d1de2f890067dcc0a 1371816739 38.906881 -78.246528 0
1296672 1296672 2020-06-21 12:12:32 3514865930894695 fraud_Stiedemann Ltd food_dining 105.93 Christopher Castaneda M 1632 Cohen Drive Suite 639 ... 32.9396 -105.8189 899 Naval architect 1967-08-30 483f52fe67fabef353d552c1e662974c 1371816752 33.619513 -105.130529 0
1296673 1296673 2020-06-21 12:13:36 2720012583106919 fraud_Reinger, Weissnat and Strosin food_dining 74.90 Joseph Murray M 42933 Ryan Underpass ... 43.3526 -102.5411 1126 Volunteer coordinator 1980-08-18 d667cdcbadaaed3da3f4020e83591c83 1371816816 42.788940 -103.241160 0
1296674 1296674 2020-06-21 12:13:37 4292902571056973207 fraud_Langosh, Wintheiser and Hyatt food_dining 4.30 Jeffrey Smith M 135 Joseph Mountains ... 45.8433 -113.8748 218 Therapist, horticultural 1995-08-16 8f7c8e4ab7f25875d753b422917c98c9 1371816817 46.565983 -114.186110 0

1296675 rows × 23 columns

In [69]:
px.histogram(x=train["trans_date_trans_time"])
In [70]:
px.histogram(x=test["trans_date_trans_time"])
In [71]:
train.drop("Unnamed: 0",axis=1,inplace=True) 
test.drop("Unnamed: 0",axis=1,inplace=True) 
train = train.drop(['cc_num','first','last','trans_num'],axis=1)
test = test.drop(['cc_num','first','last','trans_num'],axis=1)

 # to remove col Unnamed : 0 
In [72]:
from datetime import datetime as dt
train["trans_date_trans_time"] = pd.to_datetime(train["trans_date_trans_time"])
train["trans_date"] = train["trans_date_trans_time"].dt.date
train["trans_date"]= pd.to_datetime(train["trans_date"])

train['year'] = train['trans_date'].dt.year
train['month'] = train['trans_date'].dt.month
train['day'] = train['trans_date'].dt.day

test["trans_date_trans_time"] = pd.to_datetime(test["trans_date_trans_time"])
test["trans_date"] = test["trans_date_trans_time"].dt.date
test["trans_date"]= pd.to_datetime(test["trans_date"])

test['year'] = test['trans_date'].dt.year
test['month'] = test['trans_date'].dt.month
test['day'] = test['trans_date'].dt.day
In [73]:
train
Out[73]:
trans_date_trans_time merchant category amt gender street city state zip lat ... job dob unix_time merch_lat merch_long is_fraud trans_date year month day
0 2020-06-21 12:14:25 fraud_Kirlin and Sons personal_care 2.86 M 351 Darlene Green Columbia SC 29209 33.9659 ... Mechanical engineer 1968-03-19 1371816865 33.986391 -81.200714 0 2020-06-21 2020 6 21
1 2020-06-21 12:14:33 fraud_Sporer-Keebler personal_care 29.84 F 3638 Marsh Union Altonah UT 84002 40.3207 ... Sales professional, IT 1990-01-17 1371816873 39.450498 -109.960431 0 2020-06-21 2020 6 21
2 2020-06-21 12:14:53 fraud_Swaniawski, Nitzsche and Welch health_fitness 41.28 F 9333 Valentine Point Bellmore NY 11710 40.6729 ... Librarian, public 1970-10-21 1371816893 40.495810 -74.196111 0 2020-06-21 2020 6 21
3 2020-06-21 12:15:15 fraud_Haley Group misc_pos 60.05 M 32941 Krystal Mill Apt. 552 Titusville FL 32780 28.5697 ... Set designer 1987-07-25 1371816915 28.812398 -80.883061 0 2020-06-21 2020 6 21
4 2020-06-21 12:15:17 fraud_Johnston-Casper travel 3.19 M 5783 Evan Roads Apt. 465 Falmouth MI 49632 44.2529 ... Furniture designer 1955-07-06 1371816917 44.959148 -85.884734 0 2020-06-21 2020 6 21
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
555714 2020-12-31 23:59:07 fraud_Reilly and Sons health_fitness 43.77 M 558 Michael Estates Luray MO 63453 40.4931 ... Town planner 1966-02-13 1388534347 39.946837 -91.333331 0 2020-12-31 2020 12 31
555715 2020-12-31 23:59:09 fraud_Hoppe-Parisian kids_pets 111.84 M 572 Davis Mountains Lake Jackson TX 77566 29.0393 ... Futures trader 1999-12-27 1388534349 29.661049 -96.186633 0 2020-12-31 2020 12 31
555716 2020-12-31 23:59:15 fraud_Rau-Robel kids_pets 86.88 F 144 Evans Islands Apt. 683 Burbank WA 99323 46.1966 ... Musician 1981-11-29 1388534355 46.658340 -119.715054 0 2020-12-31 2020 12 31
555717 2020-12-31 23:59:24 fraud_Breitenberg LLC travel 7.99 M 7020 Doyle Stream Apt. 951 Mesa ID 83643 44.6255 ... Cartographer 1965-12-15 1388534364 44.470525 -117.080888 0 2020-12-31 2020 12 31
555718 2020-12-31 23:59:34 fraud_Dare-Marvin entertainment 38.13 M 830 Myers Plaza Apt. 384 Edmond OK 73034 35.6665 ... Media buyer 1993-05-10 1388534374 36.210097 -97.036372 0 2020-12-31 2020 12 31

555719 rows × 22 columns

In [74]:
test
Out[74]:
trans_date_trans_time merchant category amt gender street city state zip lat ... job dob unix_time merch_lat merch_long is_fraud trans_date year month day
0 2019-01-01 00:00:18 fraud_Rippin, Kub and Mann misc_net 4.97 F 561 Perry Cove Moravian Falls NC 28654 36.0788 ... Psychologist, counselling 1988-03-09 1325376018 36.011293 -82.048315 0 2019-01-01 2019 1 1
1 2019-01-01 00:00:44 fraud_Heller, Gutmann and Zieme grocery_pos 107.23 F 43039 Riley Greens Suite 393 Orient WA 99160 48.8878 ... Special educational needs teacher 1978-06-21 1325376044 49.159047 -118.186462 0 2019-01-01 2019 1 1
2 2019-01-01 00:00:51 fraud_Lind-Buckridge entertainment 220.11 M 594 White Dale Suite 530 Malad City ID 83252 42.1808 ... Nature conservation officer 1962-01-19 1325376051 43.150704 -112.154481 0 2019-01-01 2019 1 1
3 2019-01-01 00:01:16 fraud_Kutch, Hermiston and Farrell gas_transport 45.00 M 9443 Cynthia Court Apt. 038 Boulder MT 59632 46.2306 ... Patent attorney 1967-01-12 1325376076 47.034331 -112.561071 0 2019-01-01 2019 1 1
4 2019-01-01 00:03:06 fraud_Keeling-Crist misc_pos 41.96 M 408 Bradley Rest Doe Hill VA 24433 38.4207 ... Dance movement psychotherapist 1986-03-28 1325376186 38.674999 -78.632459 0 2019-01-01 2019 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1296670 2020-06-21 12:12:08 fraud_Reichel Inc entertainment 15.56 M 162 Jessica Row Apt. 072 Hatch UT 84735 37.7175 ... Geoscientist 1961-11-24 1371816728 36.841266 -111.690765 0 2020-06-21 2020 6 21
1296671 2020-06-21 12:12:19 fraud_Abernathy and Sons food_dining 51.70 M 8617 Holmes Terrace Suite 651 Tuscarora MD 21790 39.2667 ... Production assistant, television 1979-12-11 1371816739 38.906881 -78.246528 0 2020-06-21 2020 6 21
1296672 2020-06-21 12:12:32 fraud_Stiedemann Ltd food_dining 105.93 M 1632 Cohen Drive Suite 639 High Rolls Mountain Park NM 88325 32.9396 ... Naval architect 1967-08-30 1371816752 33.619513 -105.130529 0 2020-06-21 2020 6 21
1296673 2020-06-21 12:13:36 fraud_Reinger, Weissnat and Strosin food_dining 74.90 M 42933 Ryan Underpass Manderson SD 57756 43.3526 ... Volunteer coordinator 1980-08-18 1371816816 42.788940 -103.241160 0 2020-06-21 2020 6 21
1296674 2020-06-21 12:13:37 fraud_Langosh, Wintheiser and Hyatt food_dining 4.30 M 135 Joseph Mountains Sula MT 59871 45.8433 ... Therapist, horticultural 1995-08-16 1371816817 46.565983 -114.186110 0 2020-06-21 2020 6 21

1296675 rows × 22 columns

In [75]:
train["is_fraud"].value_counts()
Out[75]:
0    553574
1      2145
Name: is_fraud, dtype: int64
In [76]:
pd.crosstab(train["category"],train["is_fraud"])
Out[76]:
is_fraud 0 1
category
entertainment 40045 59
food_dining 39214 54
gas_transport 56216 154
grocery_net 19385 41
grocery_pos 52068 485
health_fitness 36622 52
home 52278 67
kids_pets 48627 65
misc_net 27100 267
misc_pos 34502 72
personal_care 39257 70
shopping_net 41273 506
shopping_pos 49578 213
travel 17409 40
In [77]:
pd.crosstab(test["category"],test["is_fraud"])
Out[77]:
is_fraud 0 1
category
entertainment 93781 233
food_dining 91310 151
gas_transport 131041 618
grocery_net 45318 134
grocery_pos 121895 1743
health_fitness 85746 133
home 122917 198
kids_pets 112796 239
misc_net 62372 915
misc_pos 79405 250
personal_care 90538 220
shopping_net 95830 1713
shopping_pos 115829 843
travel 40391 116
In [96]:
train.groupby("is_fraud")['amt'].mean()
Out[96]:
is_fraud
0     67.614408
1    528.356494
Name: amt, dtype: float64
In [97]:
pd.crosstab(train["category"],train["is_fraud"])
Out[97]:
is_fraud 0 1
category
entertainment 40045 59
food_dining 39214 54
gas_transport 56216 154
grocery_net 19385 41
grocery_pos 52068 485
health_fitness 36622 52
home 52278 67
kids_pets 48627 65
misc_net 27100 267
misc_pos 34502 72
personal_care 39257 70
shopping_net 41273 506
shopping_pos 49578 213
travel 17409 40
In [78]:
train.columns
Out[78]:
Index(['trans_date_trans_time', 'merchant', 'category', 'amt', 'gender',
       'street', 'city', 'state', 'zip', 'lat', 'long', 'city_pop', 'job',
       'dob', 'unix_time', 'merch_lat', 'merch_long', 'is_fraud', 'trans_date',
       'year', 'month', 'day'],
      dtype='object')
In [85]:
# date conversion
df_train = pd.get_dummies(train,columns=['year','month','day'],drop_first=True)
df_test = pd.get_dummies(test,columns=['year','month','day'],drop_first=True)

df_train.columns = df_train.columns.str.replace(' ', '')
df_test.columns = df_test.columns.str.replace(' ', '')

df_train = df_train.select_dtypes(include='number')
df_test = df_test.select_dtypes(include='number')
In [86]:
total = pd.concat([df_train, df_test])

X = total.drop("is_fraud",axis=1) 
y = total["is_fraud"]

X.fillna(0, inplace = True)
X = X.drop(['zip','lat','long','unix_time','merch_lat','merch_long'],axis=1)
In [87]:
X
Out[87]:
amt city_pop month_7 month_8 month_9 month_10 month_11 month_12 day_2 day_3 ... day_28 day_29 day_30 day_31 year_2020 month_2 month_3 month_4 month_5 month_6
0 2.86 333497 0 0 0 0 0 0 0 0 ... 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
1 29.84 302 0 0 0 0 0 0 0 0 ... 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
2 41.28 34496 0 0 0 0 0 0 0 0 ... 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
3 60.05 54767 0 0 0 0 0 0 0 0 ... 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
4 3.19 1126 0 0 0 0 0 0 0 0 ... 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1296670 15.56 258 0 0 0 0 0 0 0 0 ... 0 0 0 0 1.0 0.0 0.0 0.0 0.0 1.0
1296671 51.70 100 0 0 0 0 0 0 0 0 ... 0 0 0 0 1.0 0.0 0.0 0.0 0.0 1.0
1296672 105.93 899 0 0 0 0 0 0 0 0 ... 0 0 0 0 1.0 0.0 0.0 0.0 0.0 1.0
1296673 74.90 1126 0 0 0 0 0 0 0 0 ... 0 0 0 0 1.0 0.0 0.0 0.0 0.0 1.0
1296674 4.30 218 0 0 0 0 0 0 0 0 ... 0 0 0 0 1.0 0.0 0.0 0.0 0.0 1.0

1852394 rows × 44 columns

Machine Learning and Evaluation

In [88]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
 X, y, test_size=0.3, random_state=42)
In [89]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
In [90]:
X_test
Out[90]:
array([[3.32020958e-03, 2.96558579e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.15349000e-04, 5.71298428e-02, 0.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [2.61331911e-03, 2.13301994e-05, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [3.51048370e-04, 6.88070948e-05, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.16823993e-03, 4.00113256e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [8.03871267e-05, 3.79471128e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])
In [91]:
X_train
Out[91]:
array([[3.13731043e-03, 3.18525244e-02, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.33855628e-03, 4.59975429e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [3.24055995e-03, 1.91077302e-03, 0.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       ...,
       [7.41110308e-03, 1.14219777e-03, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.70218745e-03, 2.09517604e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.42488874e-03, 4.27065340e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])
In [92]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)

np.nan_to_num(X_train)
Out[92]:
array([[3.13731043e-03, 3.18525244e-02, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.33855628e-03, 4.59975429e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [3.24055995e-03, 1.91077302e-03, 0.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       ...,
       [7.41110308e-03, 1.14219777e-03, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.70218745e-03, 2.09517604e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.42488874e-03, 4.27065340e-01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])
In [93]:
from sklearn.metrics import accuracy_score, mean_absolute_error ,mean_squared_error, confusion_matrix, median_absolute_error,classification_report, f1_score,recall_score,precision_score

print("Score the X-train with Y-train is : ", dtc.score(X_train,y_train))
print("Score the X-test  with Y-test  is : ", dtc.score(X_test,y_test))

y_pred=dtc.predict(X_test)

print("Accuracy score " , accuracy_score(y_test,y_pred))
print("F1 score: ", round(f1_score(y_test, y_pred, average='weighted')*100,2),"%")
Score the X-train with Y-train is :  0.9999992287967301
Score the X-test  with Y-test  is :  0.993235790030573
Accuracy score  0.993235790030573
F1 score:  99.33 %
In [94]:
matrix = classification_report(y_test,y_pred,labels=[1,0])
print('Classification report : \n',matrix)
Classification report : 
               precision    recall  f1-score   support

           1       0.35      0.36      0.35      2890
           0       1.00      1.00      1.00    552829

    accuracy                           0.99    555719
   macro avg       0.67      0.68      0.68    555719
weighted avg       0.99      0.99      0.99    555719